setwd("E:\\5hmc_file\\2_5hmc_yjp_bam\\ASM")
dir1="./bayes_pvalue_beta0/"
dir2="./bayes_BF/"

filea=read.csv("20201112做汇总表/all.FDR.sig.at.least.one.add.direction.same.diff.csv",head=T)
filea$id=paste(filea$Chr,filea$Start,sep = ":")
filea1=filea[filea$FDR.sig>1,]

file=read.table("E:/5hmc_file/2_5hmc_yjp_bam/ASM/20210316LIBD.eQTL处理/53K.add.GWAS.eQTL.DEG.motif.for.analysis.txt",header=T,sep="\t")
#file$id=paste(file$Chr,file$Start,sep=":")
file=file[!duplicated(file$id),]	#对TF去重
file1=file[file$pattern.not.rm.dupl.num.DC>1,]
file2=file1[file1$BF_in_DC>1,]
file3=file1[file1$BF_in_DC>10,]

group1=c("X2B_X1T","M8_M7","M6_M5","M2_M1","M48_M47","M50_M49","M28_M27","M30_M29","M26_M25","M35_M36","M18_M17","M20_M19","M22_M21","M40_M39")
i=1
fn1=paste0(dir1,group1[i],".bayes_p.txt")
file1=read.table(fn1,head=T,sep = "\t")
file1$unitID=paste(file1$chrom,file1$position,file1$ref,file1$var,sep=":")
file1=file1[file1$unitID %in% as.character(filea$unitID),]

file1=data.frame(unitID=file1$unitID,normal_reads1=file1$normal_reads1,normal_reads2=file1$normal_reads2,tumor_reads1=file1$tumor_reads1,tumor_reads2=file1$tumor_reads2)
file1=file1[file1$unitID %in% as.character(filea$unitID),]
str=unlist(strsplit(group1[i],"_"))
names(file1)=c("unitID",paste(str[1],c("reads1","reads2"),sep = "_"),paste(str[2],c("reads1","reads2"),sep = "_"))
result=file1

for(i in 2:14){
  fn1=paste0(dir1,group1[i],".bayes_p.txt")
  file1=read.table(fn1,head=T,sep = "\t")
  file1$unitID=paste(file1$chrom,file1$position,file1$ref,file1$var,sep=":")
  file1=file1[file1$unitID %in% as.character(filea$unitID),]

  file1=data.frame(unitID=file1$unitID,normal_reads1=file1$normal_reads1,normal_reads2=file1$normal_reads2,tumor_reads1=file1$tumor_reads1,tumor_reads2=file1$tumor_reads2)
  file1=file1[file1$unitID %in% as.character(filea$unitID),]
  str=unlist(strsplit(group1[i],"_"))
  names(file1)=c("unitID",paste(str[1],c("reads1","reads2"),sep = "_"),paste(str[2],c("reads1","reads2"),sep = "_"))
  result=merge(result,file1,by="unitID",all=T)
  
}
write.table(result,"20210318.Ref.Alt比例/117012.Ref.Alt.reads.txt",quote=F,row.names = T,sep="\t")

affects=c("X1T","M7","M5","M1","M47","M49","M28","M27","M30","M29","M26","M25","M35","M36")
unaffects=c("X2B","M8","M6","M2","M48","M50","M18","M17","M20","M19","M22","M21","M40","M39")

sel1=c(paste0(affects,"_reads1"),paste0(affects,"_reads2"))
sel2=c(paste0(unaffects,"_reads1"),paste0(unaffects,"_reads2"))


###可视化方面，考虑到数据中alt/ref计算方式将产生很多极值，箱型图整体会被极大值压缩，要使得图好看的话建议使用VAF画箱型图
##1.如果使用alt/ref，需去除部分极值，代码如下：
tmp=result[result$unitID %in% as.character(file3$unitID),]

data.affect=tmp[,sel1]
data.affect$reads1=rowSums(data.affect[,paste0(affects,"_reads1")],na.rm = T)
data.affect$reads2=rowSums(data.affect[,paste0(affects,"_reads2")],na.rm = T)
data.affect$ratio.alt=data.affect$reads2/(data.affect$reads1)
data.affect=data.affect[is.finite(data.affect$ratio.alt),]
data.affect=data.frame(ratio.alt=data.affect$ratio.alt,group=rep("affect",dim(data.affect)[1]))


data.unaffect=tmp[,sel2]
data.unaffect$reads1=rowSums(data.unaffect[,paste0(unaffects,"_reads1")],na.rm = T)
data.unaffect$reads2=rowSums(data.unaffect[,paste0(unaffects,"_reads2")],na.rm = T)
data.unaffect$ratio.alt=data.unaffect$reads2/(data.unaffect$reads1)
data.unaffect=data.unaffect[is.finite(data.unaffect$ratio.alt),]	###是否为有限值
data.unaffect=data.frame(ratio.alt=data.unaffect$ratio.alt,group=rep("unaffect",dim(data.unaffect)[1]))


data=rbind(data.affect,data.unaffect)
data=data[data$ratio.alt<5,]	###去除部分极值

ggplot(data,aes(x = group, y = ratio.alt, fill = group)) +
  geom_boxplot(alpha=0.7) +
  scale_y_continuous(name = "ratio.alt")+
    scale_x_discrete(name = "") +
  ggtitle("200 ASH") +
  theme_bw() +
  theme(plot.title = element_text(size = 15),
        text = element_text(size = 12),
        axis.text.x=element_text(size = 11))+scale_fill_lancet()
		
##################################################################################以下使用VAF画箱型图
###117012 ASH
data.affect=result[,sel1]
data.affect$reads1=rowSums(data.affect[,paste0(affects,"_reads1")],na.rm = T)
data.affect$reads2=rowSums(data.affect[,paste0(affects,"_reads2")],na.rm = T)

data.unaffect=result[,sel2]
data.unaffect$reads1=rowSums(data.unaffect[,paste0(unaffects,"_reads1")],na.rm = T)
data.unaffect$reads2=rowSums(data.unaffect[,paste0(unaffects,"_reads2")],na.rm = T)

affect.ref=sum(data.affect$reads1)
affect.alt=sum(data.affect$reads2)
unaffect.ref=sum(data.unaffect$reads1)
unaffect.alt=sum(data.unaffect$reads2)

rt=data.frame(matrix(NA,5,ncol=3))
row.names(rt)=c("affect","unaffect","a","b","c")
names(rt)=c("alt","ref","alt./ref.")

rt[1,1]=affect.alt
rt[1,2]=affect.ref
rt[1,3]=affect.alt/affect.ref
rt[2,1]=unaffect.alt
rt[2,2]=unaffect.ref
rt[2,3]=unaffect.alt/unaffect.ref
ft=fisher.test(matrix(c(affect.alt,unaffect.alt,affect.ref,unaffect.ref),nrow = 2,byrow = T))

rt[4,]=c("Term","fisher.OR","fisher.p.value")
rt[5,]=c("117012 ASH",ft$estimate,ft$p.value)
write.csv(rt,"./20210318.Ref.Alt比例/117012.alt.ref.csv",quote=F,row.names = T)
###61725 ASH
tmp=result[result$unitID %in% as.character(filea1$unitID),]
data.affect=tmp[,sel1]
data.affect$reads1=rowSums(data.affect[,paste0(affects,"_reads1")],na.rm = T)
data.affect$reads2=rowSums(data.affect[,paste0(affects,"_reads2")],na.rm = T)

data.unaffect=tmp[,sel2]
data.unaffect$reads1=rowSums(data.unaffect[,paste0(unaffects,"_reads1")],na.rm = T)
data.unaffect$reads2=rowSums(data.unaffect[,paste0(unaffects,"_reads2")],na.rm = T)

affect.ref=sum(data.affect$reads1)
affect.alt=sum(data.affect$reads2)
unaffect.ref=sum(data.unaffect$reads1)
unaffect.alt=sum(data.unaffect$reads2)

rt=data.frame(matrix(NA,5,ncol=3))
row.names(rt)=c("affect","unaffect","a","b","c")
names(rt)=c("alt","ref","alt./ref.")

rt[1,1]=affect.alt
rt[1,2]=affect.ref
rt[1,3]=affect.alt/affect.ref
rt[2,1]=unaffect.alt
rt[2,2]=unaffect.ref
rt[2,3]=unaffect.alt/unaffect.ref
ft=fisher.test(matrix(c(affect.alt,unaffect.alt,affect.ref,unaffect.ref),nrow = 2,byrow = T))

rt[4,]=c("Term","fisher.OR","fisher.p.value")
rt[5,]=c("61725 ASH",ft$estimate,ft$p.value)
write.csv(rt,"./20210318.Ref.Alt比例/61725.alt.ref.csv",quote=F,row.names = T)

###53425 ASH
tmp=result[result$unitID %in% as.character(file$unitID),]
data.affect=tmp[,sel1]
data.affect$reads1=rowSums(data.affect[,paste0(affects,"_reads1")],na.rm = T)
data.affect$reads2=rowSums(data.affect[,paste0(affects,"_reads2")],na.rm = T)

data.unaffect=tmp[,sel2]
data.unaffect$reads1=rowSums(data.unaffect[,paste0(unaffects,"_reads1")],na.rm = T)
data.unaffect$reads2=rowSums(data.unaffect[,paste0(unaffects,"_reads2")],na.rm = T)

affect.ref=sum(data.affect$reads1)
affect.alt=sum(data.affect$reads2)
unaffect.ref=sum(data.unaffect$reads1)
unaffect.alt=sum(data.unaffect$reads2)

rt=data.frame(matrix(NA,5,ncol=3))
row.names(rt)=c("affect","unaffect","a","b","c")
names(rt)=c("alt","ref","alt./ref.")

rt[1,1]=affect.alt
rt[1,2]=affect.ref
rt[1,3]=affect.alt/affect.ref
rt[2,1]=unaffect.alt
rt[2,2]=unaffect.ref
rt[2,3]=unaffect.alt/unaffect.ref
ft=fisher.test(matrix(c(affect.alt,unaffect.alt,affect.ref,unaffect.ref),nrow = 2,byrow = T))

rt[4,]=c("Term","fisher.OR","fisher.p.value")
rt[5,]=c("53425 ASH",ft$estimate,ft$p.value)
write.csv(rt,"./20210318.Ref.Alt比例/53425.alt.ref.csv",quote=F,row.names = T)

### 8544 ASH
tmp=result[result$unitID %in% as.character(file1$unitID),]
data.affect=tmp[,sel1]
data.affect$reads1=rowSums(data.affect[,paste0(affects,"_reads1")],na.rm = T)
data.affect$reads2=rowSums(data.affect[,paste0(affects,"_reads2")],na.rm = T)

data.unaffect=tmp[,sel2]
data.unaffect$reads1=rowSums(data.unaffect[,paste0(unaffects,"_reads1")],na.rm = T)
data.unaffect$reads2=rowSums(data.unaffect[,paste0(unaffects,"_reads2")],na.rm = T)

affect.ref=sum(data.affect$reads1)
affect.alt=sum(data.affect$reads2)
unaffect.ref=sum(data.unaffect$reads1)
unaffect.alt=sum(data.unaffect$reads2)

rt=data.frame(matrix(NA,5,ncol=3))
row.names(rt)=c("affect","unaffect","a","b","c")
names(rt)=c("alt","ref","alt./ref.")

rt[1,1]=affect.alt
rt[1,2]=affect.ref
rt[1,3]=affect.alt/affect.ref
rt[2,1]=unaffect.alt
rt[2,2]=unaffect.ref
rt[2,3]=unaffect.alt/unaffect.ref
ft=fisher.test(matrix(c(affect.alt,unaffect.alt,affect.ref,unaffect.ref),nrow = 2,byrow = T))

rt[4,]=c("Term","fisher.OR","fisher.p.value")
rt[5,]=c("8544 ASH",ft$estimate,ft$p.value)
write.csv(rt,"./20210318.Ref.Alt比例/8544.alt.ref.csv",quote=F,row.names = T)

### 807 psy-ASH
tmp=result[result$unitID %in% as.character(file2$unitID),]
data.affect=tmp[,sel1]
data.affect$reads1=rowSums(data.affect[,paste0(affects,"_reads1")],na.rm = T)
data.affect$reads2=rowSums(data.affect[,paste0(affects,"_reads2")],na.rm = T)

data.unaffect=tmp[,sel2]
data.unaffect$reads1=rowSums(data.unaffect[,paste0(unaffects,"_reads1")],na.rm = T)
data.unaffect$reads2=rowSums(data.unaffect[,paste0(unaffects,"_reads2")],na.rm = T)

affect.ref=sum(data.affect$reads1)
affect.alt=sum(data.affect$reads2)
unaffect.ref=sum(data.unaffect$reads1)
unaffect.alt=sum(data.unaffect$reads2)

rt=data.frame(matrix(NA,5,ncol=3))
row.names(rt)=c("affect","unaffect","a","b","c")
names(rt)=c("alt","ref","alt./ref.")

rt[1,1]=affect.alt
rt[1,2]=affect.ref
rt[1,3]=affect.alt/affect.ref
rt[2,1]=unaffect.alt
rt[2,2]=unaffect.ref
rt[2,3]=unaffect.alt/unaffect.ref
ft=fisher.test(matrix(c(affect.alt,unaffect.alt,affect.ref,unaffect.ref),nrow = 2,byrow = T))

rt[4,]=c("Term","fisher.OR","fisher.p.value")
rt[5,]=c("807psy-ASH",ft$estimate,ft$p.value)
write.csv(rt,"./20210318.Ref.Alt比例/807psy-ASH.alt.ref.csv",quote=F,row.names = T)

### 200 psy-ASH
tmp=result[result$unitID %in% as.character(file3$unitID),]
data.affect=tmp[,sel1]
data.affect$reads1=rowSums(data.affect[,paste0(affects,"_reads1")],na.rm = T)
data.affect$reads2=rowSums(data.affect[,paste0(affects,"_reads2")],na.rm = T)

data.unaffect=tmp[,sel2]
data.unaffect$reads1=rowSums(data.unaffect[,paste0(unaffects,"_reads1")],na.rm = T)
data.unaffect$reads2=rowSums(data.unaffect[,paste0(unaffects,"_reads2")],na.rm = T)

affect.ref=sum(data.affect$reads1)
affect.alt=sum(data.affect$reads2)
unaffect.ref=sum(data.unaffect$reads1)
unaffect.alt=sum(data.unaffect$reads2)

rt=data.frame(matrix(NA,5,ncol=3))
row.names(rt)=c("affect","unaffect","a","b","c")
names(rt)=c("alt","ref","alt./ref.")

rt[1,1]=affect.alt
rt[1,2]=affect.ref
rt[1,3]=affect.alt/affect.ref
rt[2,1]=unaffect.alt
rt[2,2]=unaffect.ref
rt[2,3]=unaffect.alt/unaffect.ref
ft=fisher.test(matrix(c(affect.alt,unaffect.alt,affect.ref,unaffect.ref),nrow = 2,byrow = T))

rt[4,]=c("Term","fisher.OR","fisher.p.value")
rt[5,]=c("200psy-ASH",ft$estimate,ft$p.value)
write.csv(rt,"./20210318.Ref.Alt比例/200psy-ASH.alt.ref.csv",quote=F,row.names = T)